from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
import csv
import pymysql
def get_html(url):
    try:
        heapq = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36'}
        r=requests.get(url=url,headers=heapq)
        r.encoding=r.apparent_encoding
        r.raise_for_status()
        return r.text
    except Exception as error:
        print(error)
def parser(html):
    soup=BeautifulSoup(html,"lxml")
    out_list=[]
    for i in soup.select("#tab-book > div.col-md-8.col-sm-8.main > div.g-main > div > ul > li"):
        shuming=i.select("div.book-info > h4 > a")[0].text.strip()
        zuozhe=i.select("div.book-info > div > span")[0].text.strip()
        jiage=i.select("div.book-info > span > span")[0].text.strip()
        lianjie=i.select("div.book-info > h4 > a")[0].attrs['href']
        url="https://www.ryjiaoyu.com/book"
        new_url= urlparse(url,lianjie)
        row_list=[
            shuming,zuozhe,jiage,new_url
        ]
        out_list.append(row_list)
    return out_list
def save_mysql(sql,val,**dbinfo):
    try:
        connect=pymysql.connect(**dbinfo)
        cursor=connect.cursor()
        cursor.executemany(sql,val)
        connect.commit()
    except Exception as err:
        connect.rollback()
        print(err)
    finally:
        cursor.close()
        connect.rollback()
if __name__ == '__main__':
    url='https://www.ryjiaoyu.com/book'
    html=get_html(url)
    out_lis=parser(html)
    parms={
        "host":"127.0.0.1",
        "user":"root",
        "password":"root",
        "db":"aaa",
        "charset":"utf8",
        "cursorclass":pymysql.cursors.DictCursor

    }
    sql="insert into scordb(shuming,zuozhe,jiage,lianjie)"\
    "values(%s,%s,%s,%s)"
    save_mysql(sql,out_lis,**parms)







